R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ readr   1.3.1
## ✔ tibble  2.1.3     ✔ purrr   0.3.2
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ ggplot2 3.2.1     ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(forcats)
library(ggthemes)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(knitr)
library(naniar)
library(broom)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggpubr)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(ggplot2)
library(ggthemes)
library(RColorBrewer)
library(dplyr)
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(tidyverse)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(xtable)

Including Plots

You can also embed plots, for example:

clean_water_quality_data <- readRDS("../../data/processed_data/clean_water_quality_data.rds")
#if you get an error - this happens because you need to save the file first cause it refers to a relative path 

TABLES Lets start with positives by site….

library('tigerstats')
## Loading required package: abd
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## Loading required package: grid
## Loading required package: mosaic
## Loading required package: ggformula
## Loading required package: ggstance
## 
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_errorbarh, GeomErrorbarh
## 
## New to ggformula?  Try the tutorials: 
##  learnr::run_tutorial("introduction", package = "ggformula")
##  learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
## 
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
## 
##     mean
## The following object is masked from 'package:caret':
## 
##     dotPlot
## The following object is masked from 'package:plotly':
## 
##     do
## The following object is masked from 'package:ggthemes':
## 
##     theme_map
## The following object is masked from 'package:purrr':
## 
##     cross
## The following object is masked from 'package:ggplot2':
## 
##     stat
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median,
##     prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
## Welcome to tigerstats!
## To learn more about this package, consult its website:
##  http://homerhanumat.github.io/tigerstats
library('formattable')
## 
## Attaching package: 'formattable'
## The following object is masked from 'package:xtable':
## 
##     digits
## The following object is masked from 'package:plotly':
## 
##     style
library('gridExtra')

To do this we need to alter the data first

new_wq <- clean_water_quality_data
new_wq$arco_new <- new_wq$A_butzleri_HSP60
new_wq$HF183_new <- new_wq$HF183
unique(new_wq$arco_new)
##   [1] 3.391993 3.693023 3.316096 4.368659 4.283681 3.299289 3.869818
##   [8] 4.383025 4.112404 3.979002 4.172147 3.423574 4.964410 3.677424
##  [15] 4.133411 3.940118 4.549077 4.807535 5.000522 3.671747 4.060388
##  [22] 4.037148 4.281111 3.810609 4.207322 3.434345 3.757290 3.415140
##  [29] 3.963948 4.335562 4.378710 4.374858 4.295717 4.364660 4.091597
##  [36] 4.016699 3.541579 4.530891 4.453012 4.242939 4.130527 4.263731
##  [43] 4.430978 3.953953 3.826593 3.887054 3.836197 3.646796 3.657820
##  [50] 3.959709 3.493319 4.116608 3.835056 3.686636 3.871573 4.284372
##  [57] 4.423092 3.734224 3.438257 4.123329 3.645029 3.767156 3.872972
##  [64] 4.093912 4.655004 3.857694 3.466571 3.658965 3.422590 3.744293
##  [71] 3.651472 3.697752 3.970440 3.316599 3.594393 3.497565 3.800703
##  [78] 3.678300 3.379124 3.709049 3.296928 3.446537 4.766889 4.294025
##  [85] 3.617000 3.491726 4.465680 3.309630 3.509740 4.040484 4.214505
##  [92] 3.875686 3.273696 4.558310 3.627755 4.193125 3.952792 3.584263
##  [99] 4.155221 4.166004
str(new_wq)
## 'data.frame':    533 obs. of  26 variables:
##  $ Date                      : chr  "May 9 2017" "May 9 2017" "May 9 2017" "May 9 2017" ...
##  $ Date_Sampled              : Date, format: "2017-05-09" "2017-05-09" ...
##  $ Week                      : chr  "Week 1" "Week 1" "Week 1" "Week 1" ...
##  $ Pond                      : chr  "Country_Hills" "Inverness" "Country_Hills" "Inverness" ...
##  $ Sampling_Site             : chr  "Outfall WP31E" "Outfall_Inlet" "Outfall WP31C" "Outfall WP26B" ...
##  $ Sample_ID                 : chr  "29759" "29760" "29761" "29762" ...
##  $ Rainfall                  : chr  "0.2" "0.2" "0.2" "0.2" ...
##  $ HF183                     : num  3.33 3.33 3.33 3.33 3.33 ...
##  $ HumM2                     : num  3.85 3.85 3.85 3.85 3.85 ...
##  $ CG01                      : num  3.11 3.11 3.11 3.11 3.11 ...
##  $ LeeSg                     : num  3.28 3.28 3.28 3.28 3.28 ...
##  $ Dog3                      : num  3.16 3.16 3.16 3.16 3.16 ...
##  $ MuBac                     : num  3.55 3.55 3.55 3.55 3.55 ...
##  $ Rum2Bac                   : num  3.46 3.46 3.46 3.46 3.46 ...
##  $ A_butzleri_HSP60          : num  3.39 3.39 3.39 3.39 3.69 ...
##  $ Salmonella_spp_InvA       : num  3.17 3.17 3.17 3.17 3.17 ...
##  $ Campylobacter_spp_Van_Dkye: num  3.55 3.55 3.55 3.55 3.55 ...
##  $ Enterococcus_CCE          : num  2.12 2.06 2.36 2.25 2.09 ...
##  $ Shigatoxin_1              : chr  "negative" "NA" "negative" "NA" ...
##  $ Shigatoxin_2              : chr  "negative" "NA" "negative" "NA" ...
##  $ total_coliforms           : num  2.64 1.79 2.54 1.46 2.44 ...
##  $ E_coli                    : num  0.602 0 1.362 0 0 ...
##  $ Enteroalert               : chr  "0" "0" "1" "0.301029995663981" ...
##  $ thermotolerant_coliforms  : num  0.954 0.954 1 0.954 0.954 ...
##  $ arco_new                  : num  3.39 3.39 3.39 3.39 3.69 ...
##  $ HF183_new                 : num  3.33 3.33 3.33 3.33 3.33 ...
#convert characters to categorical varaibles
new_wq$Pond <- as.factor(as.character(new_wq$Pond))
new_wq$Sampling_Site <- as.factor(as.character(new_wq$Sampling_Site))
new_wq$Sampling_Site <- as.factor(as.character(new_wq$Sampling_Site))
str(new_wq)
## 'data.frame':    533 obs. of  26 variables:
##  $ Date                      : chr  "May 9 2017" "May 9 2017" "May 9 2017" "May 9 2017" ...
##  $ Date_Sampled              : Date, format: "2017-05-09" "2017-05-09" ...
##  $ Week                      : chr  "Week 1" "Week 1" "Week 1" "Week 1" ...
##  $ Pond                      : Factor w/ 3 levels "Country_Hills",..: 1 2 1 2 1 3 1 3 3 3 ...
##  $ Sampling_Site             : Factor w/ 13 levels "Inlet 3/4","Inlet PR60",..: 12 13 10 6 3 2 9 4 1 5 ...
##  $ Sample_ID                 : chr  "29759" "29760" "29761" "29762" ...
##  $ Rainfall                  : chr  "0.2" "0.2" "0.2" "0.2" ...
##  $ HF183                     : num  3.33 3.33 3.33 3.33 3.33 ...
##  $ HumM2                     : num  3.85 3.85 3.85 3.85 3.85 ...
##  $ CG01                      : num  3.11 3.11 3.11 3.11 3.11 ...
##  $ LeeSg                     : num  3.28 3.28 3.28 3.28 3.28 ...
##  $ Dog3                      : num  3.16 3.16 3.16 3.16 3.16 ...
##  $ MuBac                     : num  3.55 3.55 3.55 3.55 3.55 ...
##  $ Rum2Bac                   : num  3.46 3.46 3.46 3.46 3.46 ...
##  $ A_butzleri_HSP60          : num  3.39 3.39 3.39 3.39 3.69 ...
##  $ Salmonella_spp_InvA       : num  3.17 3.17 3.17 3.17 3.17 ...
##  $ Campylobacter_spp_Van_Dkye: num  3.55 3.55 3.55 3.55 3.55 ...
##  $ Enterococcus_CCE          : num  2.12 2.06 2.36 2.25 2.09 ...
##  $ Shigatoxin_1              : chr  "negative" "NA" "negative" "NA" ...
##  $ Shigatoxin_2              : chr  "negative" "NA" "negative" "NA" ...
##  $ total_coliforms           : num  2.64 1.79 2.54 1.46 2.44 ...
##  $ E_coli                    : num  0.602 0 1.362 0 0 ...
##  $ Enteroalert               : chr  "0" "0" "1" "0.301029995663981" ...
##  $ thermotolerant_coliforms  : num  0.954 0.954 1 0.954 0.954 ...
##  $ arco_new                  : num  3.39 3.39 3.39 3.39 3.69 ...
##  $ HF183_new                 : num  3.33 3.33 3.33 3.33 3.33 ...
#want to convert a new column with acro as yes or no to get the percent of the samples positive 
#lets do arco first
new_wq <- new_wq %>% mutate(arco_y_n= 
                      ifelse(arco_new < "3.391993", "no",
                      ifelse(arco_new > "3.391994", "yes",
                      "no")))
#Now HF183 and the rest
new_wq <- new_wq %>% mutate(HF183_y_n= 
                      ifelse(HF183_new < "3.329601", "no",
                      ifelse(HF183_new > "3.329602", "yes",
                      "no")))
new_wq <- new_wq %>% mutate(HumM2_y_n= 
                      ifelse(HumM2 < "3.847449", "no",
                      ifelse(HumM2 > "3.847450", "yes",
                      "no")))
new_wq <- new_wq %>% mutate(CG01_y_n= 
                      ifelse(CG01 < "3.108565", "no",
                      ifelse(CG01 > "3.108566", "yes",
                      "no")))
new_wq <- new_wq %>% mutate(LeeSg_y_n= 
                      ifelse(LeeSg < "3.283979", "no",
                      ifelse(LeeSg > "3.283980", "yes",
                      "no")))
new_wq <- new_wq %>% mutate(Dog3_y_n= 
                      ifelse(Dog3 < "3.156549", "no",
                      ifelse(Dog3 > "3.156550", "yes",
                      "no")))
new_wq <- new_wq %>% mutate(MuBac_y_n= 
                      ifelse(MuBac < "3.554852", "no",
                      ifelse(MuBac > "3.554853", "yes",
                      "no")))
new_wq <- new_wq %>% mutate(Rum2Bac_y_n= 
                      ifelse(Rum2Bac < "3.457125", "no",
                      ifelse(Rum2Bac > "3.457126", "yes",
                      "no")))
new_wq <- new_wq %>% mutate(Salmonella_y_n= 
                      ifelse(Salmonella_spp_InvA < "3.170848", "no",
                      ifelse(Salmonella_spp_InvA > "3.170849", "yes",
                      "no")))
new_wq <- new_wq %>% mutate(Campy_y_n= 
                      ifelse(Campylobacter_spp_Van_Dkye < "3.554852", "no",
                      ifelse(Campylobacter_spp_Van_Dkye > "3.554853", "yes",
                      "no")))

#water quality indicators
new_wq <- new_wq %>% mutate(Entero_y_n= 
                      ifelse(Enterococcus_CCE < "3.102", "no",
                      ifelse(Enterococcus_CCE > "3.103", "yes",
                      "no")))

new_wq <- new_wq %>% mutate(thermo_y_n= 
                      ifelse(thermotolerant_coliforms < "2.602", "no",
                      ifelse(thermotolerant_coliforms > "2.603", "yes",
                      "no")))
xtabs(~Pond+Entero_y_n, data=new_wq)
##                Entero_y_n
## Pond             no yes
##   Country_Hills 180  25
##   Inverness     143  21
##   McCall_Lake   116  46
Entero_Pond<- rowPerc(xtabs(~Pond+Entero_y_n, data=new_wq))
png("../../results/Entero_pond_table.png", height=400, width=400)
p<-tableGrob(Entero_Pond)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
xtabs(~Pond+thermo_y_n, data=new_wq)
##                thermo_y_n
## Pond             no yes
##   Country_Hills 185  20
##   Inverness     160   3
##   McCall_Lake   133  31
thermo_pond<- rowPerc(xtabs(~Pond+thermo_y_n, data=new_wq))
png("../../results/thermo_pond_table.png", height=400, width=400)
p<-tableGrob(thermo_pond)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#Arco positives by site 
xtabs(~Pond+arco_y_n, data=new_wq)
##                arco_y_n
## Pond             no yes
##   Country_Hills 166  39
##   Inverness     124  40
##   McCall_Lake   104  60
Arco_Pond<- rowPerc(xtabs(~Pond+arco_y_n, data=new_wq))
png("../../results/Arco_pond_table.png", height=400, width=400)
p<-tableGrob(Arco_Pond)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
Arco_Pond %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  save_kable(file = "../../results/Arco_Pond_table_4.png")


  #kable_as_image("../../results/test3")

#kableExtra::save_kable(Arco_Pond, file = "table.pdf")

  ##save_kable(file = "../../results/Arco_Pond_table_4.png")

##save(Arco_Pond, file = "../../results/Arco_Pond_table_2.RData")
#print(xtable(head(Arco_Pond_2)), type="html")

library(xtable)
print(xtable(Arco_Pond), type='html')
no yes Total
Country_Hills 80.98 19.02 100.00
Inverness 75.61 24.39 100.00
McCall_Lake 63.41 36.59 100.00
xtabs(~Pond+HF183_y_n, data=new_wq)
##                HF183_y_n
## Pond             no yes
##   Country_Hills 150  55
##   Inverness     140  24
##   McCall_Lake    94  70
HF183_y_n_pond<- rowPerc(xtabs(~Pond+HF183_y_n, data=new_wq))
png("../../results/HF183_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(HF183_y_n_pond)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
xtabs(~Pond+HumM2_y_n, data=new_wq)
##                HumM2_y_n
## Pond             no yes
##   Country_Hills 190  15
##   Inverness     159   5
##   McCall_Lake   138  26
HumM2_y_n_pond<- rowPerc(xtabs(~Pond+HumM2_y_n, data=new_wq))
png("../../results/HumM2_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(HumM2_y_n_pond)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#Campy positives by site 
xtabs(~Sampling_Site+Campy_y_n, data=new_wq)
##                Campy_y_n
## Sampling_Site   no yes
##   Inlet 3/4     38   3
##   Inlet PR60    39   2
##   Inlet WP31B   42   0
##   Outfall ML1   38   3
##   Outfall ML2   38   3
##   Outfall WP26B 38   0
##   Outfall WP26C 39   0
##   Outfall WP26D 45   1
##   Outfall WP31A 40   1
##   Outfall WP31C 38   3
##   Outfall WP31D 37   4
##   Outfall WP31E 39   1
##   Outfall_Inlet 40   1
campy_site<- rowPerc(xtabs(~Sampling_Site+Campy_y_n, data=new_wq))
png("../../results/campy_site_table.png", height=400, width=400)
p<-tableGrob(campy_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#Arco positives by site 
xtabs(~Sampling_Site+arco_y_n, data=new_wq)
##                arco_y_n
## Sampling_Site   no yes
##   Inlet 3/4     29  12
##   Inlet PR60    24  17
##   Inlet WP31B   30  12
##   Outfall ML1   27  14
##   Outfall ML2   24  17
##   Outfall WP26B 32   6
##   Outfall WP26C 35   4
##   Outfall WP26D 39   7
##   Outfall WP31A 36   5
##   Outfall WP31C 34   7
##   Outfall WP31D 36   5
##   Outfall WP31E 30  10
##   Outfall_Inlet 18  23
arco_site<- rowPerc(xtabs(~Sampling_Site+arco_y_n, data=new_wq))
png("../../results/arco_site_table.png", height=400, width=400)
p<-tableGrob(arco_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#save as df
save(arco_site, file = "../../results/saved_arco_site.RData")
#Salmonella positives by site 
xtabs(~Sampling_Site+Salmonella_y_n, data=new_wq)
##                Salmonella_y_n
## Sampling_Site   no yes
##   Inlet 3/4     41   0
##   Inlet PR60    41   0
##   Inlet WP31B   42   0
##   Outfall ML1   41   0
##   Outfall ML2   39   2
##   Outfall WP26B 38   0
##   Outfall WP26C 39   0
##   Outfall WP26D 46   0
##   Outfall WP31A 41   0
##   Outfall WP31C 41   0
##   Outfall WP31D 40   1
##   Outfall WP31E 40   0
##   Outfall_Inlet 40   1
Salmonella_site<- rowPerc(xtabs(~Sampling_Site+Salmonella_y_n, data=new_wq))
png("../../results/salmonella_site_table.png", height=400, width=400)
p<-tableGrob(Salmonella_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#HF183 positives by site 
xtabs(~Pond+HF183_y_n, data=new_wq)
##                HF183_y_n
## Pond             no yes
##   Country_Hills 150  55
##   Inverness     140  24
##   McCall_Lake    94  70
HF183_y_n_site<- rowPerc(xtabs(~Pond+HF183_y_n, data=new_wq))
png("../../results/HF183_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(HF183_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#HumM2
xtabs(~Pond+HumM2_y_n, data=new_wq)
##                HumM2_y_n
## Pond             no yes
##   Country_Hills 190  15
##   Inverness     159   5
##   McCall_Lake   138  26
HumM2_y_n_site<- rowPerc(xtabs(~Pond+HumM2_y_n, data=new_wq))
png("../../results/HumM2_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(HumM2_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#CG01 positives by site 
xtabs(~Pond+CG01_y_n, data=new_wq)
##                CG01_y_n
## Pond             no yes
##   Country_Hills 202   3
##   Inverness     162   2
##   McCall_Lake   157   7
CG01_y_n_site<- rowPerc(xtabs(~Pond+CG01_y_n, data=new_wq))
png("../../results/CG01_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(CG01_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#Dog3 positives by site 
xtabs(~Pond+Dog3_y_n, data=new_wq)
##                Dog3_y_n
## Pond             no yes
##   Country_Hills 198   7
##   Inverness     163   1
##   McCall_Lake   159   5
Dog3_y_n_site<- rowPerc(xtabs(~Pond+Dog3_y_n, data=new_wq))
png("../../results/Dog3_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(Dog3_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#Rum2Bac positives by site 
xtabs(~Pond+Rum2Bac_y_n, data=new_wq)
##                Rum2Bac_y_n
## Pond             no yes
##   Country_Hills 200   5
##   Inverness     163   1
##   McCall_Lake   161   3
Rum2Bac_y_n_site<- rowPerc(xtabs(~Pond+Rum2Bac_y_n, data=new_wq))
png("../../results/Rum2Bac_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(Rum2Bac_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#LeeSg positives by site 
xtabs(~Pond+LeeSg_y_n, data=new_wq)
##                LeeSg_y_n
## Pond             no yes
##   Country_Hills 185  20
##   Inverness     157   7
##   McCall_Lake   139  25
LeeSg_y_n_site<- rowPerc(xtabs(~Pond+LeeSg_y_n, data=new_wq))
png("../../results/LeeSg_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(LeeSg_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#MuBac positives by site 
xtabs(~Pond+MuBac_y_n, data=new_wq)
##                MuBac_y_n
## Pond             no yes
##   Country_Hills 202   3
##   Inverness     163   1
##   McCall_Lake   163   1
MuBac_y_n_site<- rowPerc(xtabs(~Pond+MuBac_y_n, data=new_wq))
png("../../results/MuBac_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(MuBac_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#HF183 positives by site 
xtabs(~Sampling_Site+HF183_y_n, data=new_wq)
##                HF183_y_n
## Sampling_Site   no yes
##   Inlet 3/4     34   7
##   Inlet PR60    27  14
##   Inlet WP31B   31  11
##   Outfall ML1   30  11
##   Outfall ML2    3  38
##   Outfall WP26B 34   4
##   Outfall WP26C 31   8
##   Outfall WP26D 41   5
##   Outfall WP31A 37   4
##   Outfall WP31C 30  11
##   Outfall WP31D 24  17
##   Outfall WP31E 28  12
##   Outfall_Inlet 34   7
HF183_y_n_site<- rowPerc(xtabs(~Sampling_Site+HF183_y_n, data=new_wq))
png("../../results/HF183_y_n_site_table.png", height=400, width=400)
p<-tableGrob(HF183_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#HumM2 positives by site 
xtabs(~Sampling_Site+HumM2_y_n, data=new_wq)
##                HumM2_y_n
## Sampling_Site   no yes
##   Inlet 3/4     39   2
##   Inlet PR60    39   2
##   Inlet WP31B   42   0
##   Outfall ML1   37   4
##   Outfall ML2   23  18
##   Outfall WP26B 37   1
##   Outfall WP26C 37   2
##   Outfall WP26D 45   1
##   Outfall WP31A 40   1
##   Outfall WP31C 38   3
##   Outfall WP31D 34   7
##   Outfall WP31E 36   4
##   Outfall_Inlet 40   1
HumM2_y_n_site<- rowPerc(xtabs(~Sampling_Site+HumM2_y_n, data=new_wq))
png("../../results/HumM2_y_n_site_table.png", height=400, width=400)
p<-tableGrob(HumM2_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#CG01 positives by site 
xtabs(~Sampling_Site+CG01_y_n, data=new_wq)
##                CG01_y_n
## Sampling_Site   no yes
##   Inlet 3/4     41   0
##   Inlet PR60    40   1
##   Inlet WP31B   41   1
##   Outfall ML1   37   4
##   Outfall ML2   39   2
##   Outfall WP26B 37   1
##   Outfall WP26C 39   0
##   Outfall WP26D 46   0
##   Outfall WP31A 41   0
##   Outfall WP31C 40   1
##   Outfall WP31D 41   0
##   Outfall WP31E 39   1
##   Outfall_Inlet 40   1
CG01_y_n_site<- rowPerc(xtabs(~Sampling_Site+CG01_y_n, data=new_wq))
png("../../results/CG01_y_n_site_table.png", height=400, width=400)
p<-tableGrob(CG01_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#Dog3 positives by site 
xtabs(~Sampling_Site+Dog3_y_n, data=new_wq)
##                Dog3_y_n
## Sampling_Site   no yes
##   Inlet 3/4     40   1
##   Inlet PR60    40   1
##   Inlet WP31B   42   0
##   Outfall ML1   41   0
##   Outfall ML2   38   3
##   Outfall WP26B 37   1
##   Outfall WP26C 39   0
##   Outfall WP26D 46   0
##   Outfall WP31A 39   2
##   Outfall WP31C 38   3
##   Outfall WP31D 39   2
##   Outfall WP31E 40   0
##   Outfall_Inlet 41   0
Dog3_y_n_site<- rowPerc(xtabs(~Sampling_Site+Dog3_y_n, data=new_wq))
png("../../results/Dog3_y_n_site_table.png", height=400, width=400)
p<-tableGrob(Dog3_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#Rum2Bac positives by site 
xtabs(~Sampling_Site+Rum2Bac_y_n, data=new_wq)
##                Rum2Bac_y_n
## Sampling_Site   no yes
##   Inlet 3/4     41   0
##   Inlet PR60    41   0
##   Inlet WP31B   40   2
##   Outfall ML1   39   2
##   Outfall ML2   40   1
##   Outfall WP26B 38   0
##   Outfall WP26C 39   0
##   Outfall WP26D 45   1
##   Outfall WP31A 41   0
##   Outfall WP31C 41   0
##   Outfall WP31D 38   3
##   Outfall WP31E 40   0
##   Outfall_Inlet 41   0
Rum2Bac_y_n_site<- rowPerc(xtabs(~Sampling_Site+Rum2Bac_y_n, data=new_wq))
png("../../results/Rum2Bac_y_n_site_table.png", height=400, width=400)
p<-tableGrob(Rum2Bac_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#LeeSg positives by site 
xtabs(~Sampling_Site+LeeSg_y_n, data=new_wq)
##                LeeSg_y_n
## Sampling_Site   no yes
##   Inlet 3/4     38   3
##   Inlet PR60    35   6
##   Inlet WP31B   40   2
##   Outfall ML1   34   7
##   Outfall ML2   32   9
##   Outfall WP26B 35   3
##   Outfall WP26C 39   0
##   Outfall WP26D 44   2
##   Outfall WP31A 39   2
##   Outfall WP31C 34   7
##   Outfall WP31D 36   5
##   Outfall WP31E 36   4
##   Outfall_Inlet 39   2
LeeSg_y_n_site<- rowPerc(xtabs(~Sampling_Site+LeeSg_y_n, data=new_wq))
png("../../results/LeeSg_y_n_site_table.png", height=400, width=400)
p<-tableGrob(LeeSg_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#Entero by site
xtabs(~Sampling_Site+Entero_y_n, data=new_wq)
##                Entero_y_n
## Sampling_Site   no yes
##   Inlet 3/4     34   6
##   Inlet PR60    28  12
##   Inlet WP31B   38   4
##   Outfall ML1   34   7
##   Outfall ML2   20  21
##   Outfall WP26B 38   0
##   Outfall WP26C 34   5
##   Outfall WP26D 40   6
##   Outfall WP31A 40   1
##   Outfall WP31C 31  10
##   Outfall WP31D 35   6
##   Outfall WP31E 36   4
##   Outfall_Inlet 31  10
Entero_y_n_site<- rowPerc(xtabs(~Sampling_Site+Entero_y_n, data=new_wq))
png("../../results/Entero_y_n_site_table.png", height=400, width=400)
p<-tableGrob(Entero_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#Thermotoler coliforms by site
xtabs(~Sampling_Site+thermo_y_n, data=new_wq)
##                thermo_y_n
## Sampling_Site   no yes
##   Inlet 3/4     38   3
##   Inlet PR60    36   5
##   Inlet WP31B   42   0
##   Outfall ML1   36   5
##   Outfall ML2   23  18
##   Outfall WP26B 37   1
##   Outfall WP26C 38   1
##   Outfall WP26D 46   0
##   Outfall WP31A 41   0
##   Outfall WP31C 31  10
##   Outfall WP31D 36   5
##   Outfall WP31E 35   5
##   Outfall_Inlet 39   1
thermo_y_n_site<- rowPerc(xtabs(~Sampling_Site+thermo_y_n, data=new_wq))
png("../../results/thermo_y_n_site_table.png", height=400, width=400)
p<-tableGrob(thermo_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2

LINE GRAPHS Data: clean_data First lets look at line graphs by sampling site.

library('gridExtra')
library('ggplot2')
#it stays the same but i want to shorten the name
clean_data <- clean_water_quality_data
ML2 <- subset(clean_data, Sampling_Site == "Outfall ML2")
ML1 <- subset(clean_data, Sampling_Site == "Outfall ML1")
PR60 <- subset(clean_data, Sampling_Site == "Inlet PR60")
Inlet_34 <- subset(clean_data, Sampling_Site == "Inlet 3/4")
McCall_Lake <- subset(clean_data, Pond == "McCall_Lake")
Inverness <- subset(clean_data, Pond == "Inverness")
Country_Hills <- subset(clean_data, Pond == "Country_Hills")
#Need to fix as some of it is getting cut off 
#want to add arrows pointing at holidays (https://www.r-bloggers.com/point-arrows-to-specific-parts-of-the-data-2/)
#tried this: geom_segment(aes(x = Date_Sampled, y = 5.5, xend = Date_Sampled, yend = 4.5),
                  #arrow = arrow(length = unit(0.5, "cm")))

HF183_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = HF183, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.67, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 6.3) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "McCall Sampling Sites") +
  annotate("segment", x= as.Date("2017-07-03"), xend = as.Date("2017-07-03"), y = 6, yend = 5, arrow = arrow(), size = 1) +
  annotate("segment", x= as.Date("2017-05-22"), xend = as.Date("2017-05-22"), y = 6, yend = 5, arrow = arrow(), size = 1) +
  annotate("segment", x= as.Date("2017-08-07"), xend = as.Date("2017-08-07"), y = 6, yend = 5, arrow = arrow(), size = 1) +
  annotate("segment", x= as.Date("2017-09-03"), xend = as.Date("2017-09-03"), y = 6, yend = 5, arrow = arrow(), size = 1)
  

HF183_at_McCall

ggsave(filename = "../../results/HF183_at_McCall.png",plot = HF183_at_McCall)
## Saving 7 x 5 in image
HF183_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = HF183, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.67, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 6.3) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "Country Hills Sampling Sites") +
  annotate("segment", x= as.Date("2017-07-03"), xend = as.Date("2017-07-03"), y = 6, yend = 5, arrow = arrow(), size = 1) +
  annotate("segment", x= as.Date("2017-05-22"), xend = as.Date("2017-05-22"), y = 6, yend = 5, arrow = arrow(), size = 1) +
  annotate("segment", x= as.Date("2017-08-07"), xend = as.Date("2017-08-07"), y = 6, yend = 5, arrow = arrow(), size = 1) +
  annotate("segment", x= as.Date("2017-09-03"), xend = as.Date("2017-09-03"), y = 6, yend = 5, arrow = arrow(), size = 1)
  

HF183_at_Country_Hills  

ggsave(filename = "../../results/HF183_at_Country_Hills.png",plot = HF183_at_Country_Hills)
## Saving 7 x 5 in image
HF183_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = HF183, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.67, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 6.3) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "Inverness Sampling Sites") +
  annotate("segment", x= as.Date("2017-07-03"), xend = as.Date("2017-07-03"), y = 6, yend = 5, arrow = arrow(), size = 1) +
  annotate("segment", x= as.Date("2017-05-22"), xend = as.Date("2017-05-22"), y = 6, yend = 5, arrow = arrow(), size = 1) +
  annotate("segment", x= as.Date("2017-08-07"), xend = as.Date("2017-08-07"), y = 6, yend = 5, arrow = arrow(), size = 1) +
  annotate("segment", x= as.Date("2017-09-03"), xend = as.Date("2017-09-03"), y = 6, yend = 5, arrow = arrow(), size = 1)
  
  

HF183_at_Inverness

ggsave(filename = "../../results/HF183_at_Inverness.png",plot = HF183_at_Inverness)
## Saving 7 x 5 in image
HF183_all_arranged <- grid.arrange(HF183_at_McCall, HF183_at_Country_Hills, HF183_at_Inverness)

ggsave(filename = "../../results/HF183_all_arranged.png",plot = HF183_all_arranged)
## Saving 7 x 5 in image
#Need to fix as some of it is getting cut off 

Arco_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 5.2) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "McCall Sampling Sites") +
  annotate("segment", x= as.Date("2017-06-08"), xend = as.Date("2017-06-08"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1) + annotate("segment", x= as.Date("2017-05-25"), xend = as.Date("2017-05-25"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1) +
 annotate("segment", x= as.Date("2017-09-13"), xend = as.Date("2017-09-13"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1) 
  

Arco_at_McCall

ggsave(filename = "../../results/Arco_at_McCall.png",plot = Arco_at_McCall)
## Saving 7 x 5 in image
Arco_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 5.2) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "Country Hills Sampling Sites")

Arco_at_Country_Hills  

ggsave(filename = "../../results/Arco_at_Country_Hills.png",plot = Arco_at_Country_Hills)
## Saving 7 x 5 in image
Arco_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 5.2) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "Inverness Sampling Sites")
  

Arco_at_Inverness

ggsave(filename = "../../results/Arco_at_Inverness.png",plot = Arco_at_Inverness)
## Saving 7 x 5 in image
Arco_all_arranged <- grid.arrange(Arco_at_Country_Hills, Arco_at_Inverness, Arco_at_McCall)

ggsave(filename = "../../results/Arco_all_arranged.png",plot = Arco_all_arranged)
## Saving 7 x 5 in image
#Need to fix as some of it is getting cut off 

Arco_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 5.2) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "McCall Sampling Sites") +
  annotate("segment", x= as.Date("2017-06-08"), xend = as.Date("2017-06-08"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1) + annotate("segment", x= as.Date("2017-05-25"), xend = as.Date("2017-05-25"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1) +
 annotate("segment", x= as.Date("2017-09-13"), xend = as.Date("2017-09-13"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1) 
  

Arco_at_McCall

ggsave(filename = "../../results/Arco_at_McCall.png",plot = Arco_at_McCall)
## Saving 7 x 5 in image
Arco_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 5.2) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "Country Hills Sampling Sites")

Arco_at_Country_Hills  

ggsave(filename = "../../results/Arco_at_Country_Hills.png",plot = Arco_at_Country_Hills)
## Saving 7 x 5 in image
Arco_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 5.2) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "Inverness Sampling Sites")
  

Arco_at_Inverness

ggsave(filename = "../../results/Arco_at_Inverness.png",plot = Arco_at_Inverness)
## Saving 7 x 5 in image
Arco_all_arranged <- grid.arrange(Arco_at_Country_Hills, Arco_at_Inverness, Arco_at_McCall)

ggsave(filename = "../../results/Arco_all_arranged.png",plot = Arco_all_arranged)
## Saving 7 x 5 in image
#Need to fix as some of it is getting cut off 

HumM2_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = HumM2, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 4.14, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 6) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "McCall Sampling Sites")
  

HumM2_at_McCall

ggsave(filename = "../../results/HumM2_at_McCall.png",plot = HumM2_at_McCall)
## Saving 7 x 5 in image
HumM2_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = HumM2, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 4.14, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 6) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "Country Hills Sampling Sites")

HumM2_at_Country_Hills  

ggsave(filename = "../../results/HumM2_at_Country_Hills.png",plot = HumM2_at_Country_Hills)
## Saving 7 x 5 in image
HumM2_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = HumM2, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 4.14, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(3.2, 6) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "Inverness Sampling Sites")
  

HumM2_at_Inverness

ggsave(filename = "../../results/HumM2_at_Inverness.png",plot = HumM2_at_Inverness)
## Saving 7 x 5 in image
HumM2_all_arranged <- grid.arrange(HumM2_at_Country_Hills, HumM2_at_Inverness, HumM2_at_McCall)

ggsave(filename = "../../results/HumM2_all_arranged.png",plot = HumM2_all_arranged)
## Saving 7 x 5 in image
CGO1_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = CG01, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.4, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(2.5, 6) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "McCall Sampling Sites")
  

CGO1_at_McCall

ggsave(filename = "../../results/CGO1_at_McCall.png",plot = CGO1_at_McCall)
## Saving 7 x 5 in image
CG01_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = CG01, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.4, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(2.5, 6) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "Country Hills Sampling Sites")

CG01_at_Country_Hills  

ggsave(filename = "../../results/_at_Country_Hills.png",plot = CG01_at_Country_Hills)
## Saving 7 x 5 in image
CG01_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = CG01, color = Sampling_Site)) + geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.4, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(2.5, 6) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
  labs(color = "Inverness Sampling Sites")
  

CG01_at_Inverness

ggsave(filename = "../../results/CG01_at_Inverness.png",plot = CG01_at_Inverness)
## Saving 7 x 5 in image
HumM2_all_arranged <- grid.arrange(CG01_at_Country_Hills, CG01_at_Inverness, CGO1_at_McCall)

ggsave(filename = "../../results/HumM2_all_arranged.png",plot = HumM2_all_arranged)
## Saving 7 x 5 in image
Seagull_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = LeeSg, color = Sampling_Site)) + 
  geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.28, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(2.5, 5) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 7), legend.title=element_text(size=7)) +
  labs(color = "McCall Sampling Sites") +
  annotate("segment", x= as.Date("2017-06-08"), xend = as.Date("2017-06-08"), y = 5, yend = 4.7, arrow = arrow(), size = 1) + 
  annotate("segment", x= as.Date("2017-05-25"), xend = as.Date("2017-05-25"), y = 5, yend = 4.7, arrow = arrow(), size = 1) +
 annotate("segment", x= as.Date("2017-09-13"), xend = as.Date("2017-09-13"), y = 5, yend = 4.7, arrow = arrow(), size = 1)
  


Seagull_at_McCall

ggsave(filename = "../../results/LeeSg_at_McCall.png",plot = Seagull_at_McCall)
## Saving 7 x 5 in image
Seagull_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = LeeSg, color = Sampling_Site)) + 
  geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.28, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(2.5, 5) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 7), legend.title=element_text(size=7)) +
  labs(color = "Inverness Sampling Sites") +
  annotate("segment", x= as.Date("2017-06-08"), xend = as.Date("2017-06-08"), y = 5, yend = 4.7, arrow = arrow(), size = 1) + 
  annotate("segment", x= as.Date("2017-05-25"), xend = as.Date("2017-05-25"), y = 5, yend = 4.7, arrow = arrow(), size = 1) +
 annotate("segment", x= as.Date("2017-09-13"), xend = as.Date("2017-09-13"), y = 5, yend = 4.7, arrow = arrow(), size = 1)
  


Seagull_at_Inverness

ggsave(filename = "../../results/LeeSg_at_Inverness.png",plot = Seagull_at_Inverness)
## Saving 7 x 5 in image
Seagull_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = LeeSg, color = Sampling_Site)) + 
  geom_line() +
  geom_point() +
  geom_hline(yintercept = 3.28, colour = "black", linetype = "dotted") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw() +
  ylim(2.5, 5) +
  theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 7), legend.title=element_text(size=7)) +
  labs(color = "Country Hills Sampling Sites") +
  annotate("segment", x= as.Date("2017-06-08"), xend = as.Date("2017-06-08"), y = 5, yend = 4.7, arrow = arrow(), size = 1) + 
  annotate("segment", x= as.Date("2017-05-25"), xend = as.Date("2017-05-25"), y = 5, yend = 4.7, arrow = arrow(), size = 1) +
 annotate("segment", x= as.Date("2017-09-13"), xend = as.Date("2017-09-13"), y = 5, yend = 4.7, arrow = arrow(), size = 1)
  


Seagull_at_Country_Hills

ggsave(filename = "../../results/LeeSg_at_Country_Hills.png",plot = Seagull_at_Country_Hills)
## Saving 7 x 5 in image
Inverness_bird_Arco <- ggplot(Inverness) +
  geom_line(mapping = aes(x = Date_Sampled, y = CG01), color = "dark green") +
  geom_line(mapping = aes(x = Date_Sampled, y = LeeSg), color = "purple") +
  geom_line(mapping = aes(x = Date_Sampled, y = A_butzleri_HSP60), color = "navy") +
  geom_hline(yintercept = 3.4, colour = "dark green", linetype = "dotted") +
  geom_hline(yintercept = 3.58, colour = "purple", linetype = "dotted") + 
  geom_hline(yintercept = 3.69, colour = "navy", linetype = "dotted") +
  geom_point(mapping = aes(x = Date_Sampled, y = A_butzleri_HSP60), color = "navy") +
  geom_point(mapping = aes(x = Date_Sampled, y = CG01), color = "dark green") +
  geom_point(mapping = aes(x = Date_Sampled, y = LeeSg), color = "purple") +
  labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
  theme_bw()

Inverness_bird_Arco

RASTER PLOTS

raster_by_pond_HF183 <- ggplot(clean_data, aes(x = Date_Sampled, y = Pond, fill = HF183)) + geom_raster(interpolate = TRUE) + scale_fill_gradientn(colors = c("blue", "yellow", "red"))
raster_by_pond_HF183
## Warning in f(...): Raster pixels are placed at uneven horizontal intervals
## and will be shifted. Consider using geom_tile() instead.

raster_by_pond_Entero <- ggplot(clean_data, aes(x = Date_Sampled, y = Pond, fill = Enterococcus_CCE)) + geom_raster(interpolate = TRUE) + scale_fill_gradientn(colors = c("blue", "yellow", "purple"))
raster_by_pond_Entero
## Warning in f(...): Raster pixels are placed at uneven horizontal intervals
## and will be shifted. Consider using geom_tile() instead.

HEAT MAP

#heatmap <- heatmap(clean_data, aes(x = , y = pond)) + geom_tile(aes(fill = HF183))

COOCCURANCE PLOTS AND FIGURES

#Two way tables
table_arco_human <- table(new_wq$arco_y_n, new_wq$HF183_y_n)
table_arco_human
##      
##        no yes
##   no  298  96
##   yes  86  53
#shows the numbers
margin.table(table_arco_human)
## [1] 533
margin.table(table_arco_human, 1)
## 
##  no yes 
## 394 139
margin.table(table_arco_human, 2)
## 
##  no yes 
## 384 149
#gets the proportions (percents)
table_arco_human/margin.table(table_arco_human)
##      
##               no        yes
##   no  0.55909944 0.18011257
##   yes 0.16135084 0.09943715
prop.table(table_arco_human) #prop.table is a command that will convert any table to proportions
##      
##               no        yes
##   no  0.55909944 0.18011257
##   yes 0.16135084 0.09943715
#the yes-yes column in this table gives us the total number of samples positive for both variables out of all samples 
#mosiac plot of table
mosaicplot(table_arco_human, shade = TRUE, main= "Two-way table of samples that are positive for Arcobacter butzleri and HF183", xlab = "Arcobacter butzlier", ylab = "HF183")

#instruction from: https://www.cyclismo.org/tutorial/R/tables.html 
mosaicplot(table_arco_human, sort=c(2,1))

library(vcd)
## 
## Attaching package: 'vcd'
## The following object is masked from 'package:mosaic':
## 
##     mplot
#vdc::mosaicplot()
#assoc_1 <- ggplot(data = clean_data) +
  #geom_mosaic(aes(x = A_butzleri_HSP60_A, fill=HF183), divider="hbar", na.rm=TRUE) + labs(x=" ", title='divider = "hbar"') 

CORRELATION

library(corrplot)
## corrplot 0.84 loaded
library(ggplot2)
corr_subset <- clean_data[c(8,9,10,11)]
corr_matrix_test <- cor(corr_subset)
corrplot(corr_matrix_test, method = "square")

#correlation with HF183, HumM2 and Arco
corr_subset <- clean_data[c(8,9,10,11,15)]
corr_matrix_arco_human <- cor(corr_subset)
corr_matrix_arco_human_plot <- corrplot(corr_matrix_arco_human, method = "square")

corr_matrix_arco_human_plot
##                          HF183       HumM2          CG01       LeeSg
## HF183             1.0000000000  0.28689668 -0.0005077393  0.18360616
## HumM2             0.2868966805  1.00000000 -0.0299054267  0.13117928
## CG01             -0.0005077393 -0.02990543  1.0000000000 -0.01105322
## LeeSg             0.1836061565  0.13117928 -0.0110532178  1.00000000
## A_butzleri_HSP60  0.0996056716  0.09316562  0.0625449999  0.16267569
##                  A_butzleri_HSP60
## HF183                  0.09960567
## HumM2                  0.09316562
## CG01                   0.06254500
## LeeSg                  0.16267569
## A_butzleri_HSP60       1.00000000
png(height=1800, width=1800, file="../../results/corrplot_arco_human_bird.png")
corr_plot = corrplot(corr_matrix_arco_human_plot, method = "square", tl.cex = 5)
dev.off()
## quartz_off_screen 
##                 2
#Save it as a table
#png("../../results/corr_matrix_arco_human).png", height=400, width=400)
#p<-tableGrob(corr_matrix_arco_human_plot)
#grid.arrange(p)
#dev.off()
#png("../../results/corr_matrix_arco_human.png", height=400, width=400)
#p<-grob(corr_matrix_arco_human_plot)
#dev.off()
d_cor = clean_data %>% select(A_butzleri_HSP60, HF183, HumM2) %>% cor()
png(height=1800, width=1800, file="../../results/corr_plot_arco_human.png")
corr_plot = corrplot(d_cor, method="color", tl.cex = 5)
dev.off()
## quartz_off_screen 
##                 2

TST

#clean_data %>%
  #t.test(A_butzleri_HSP60, HF183)
ks.test(McCall_Lake$HF183, McCall_Lake$HumM2)
## Warning in ks.test(McCall_Lake$HF183, McCall_Lake$HumM2): p-value will be
## approximate in the presence of ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  McCall_Lake$HF183 and McCall_Lake$HumM2
## D = 0.75, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(McCall_Lake$HF183, McCall_Lake$A_butzleri_HSP60)
## Warning in ks.test(McCall_Lake$HF183, McCall_Lake$A_butzleri_HSP60): p-
## value will be approximate in the presence of ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  McCall_Lake$HF183 and McCall_Lake$A_butzleri_HSP60
## D = 0.57927, p-value < 2.2e-16
## alternative hypothesis: two-sided
t.test(McCall_Lake$HF183, McCall_Lake$HumM2)
## 
##  Welch Two Sample t-test
## 
## data:  McCall_Lake$HF183 and McCall_Lake$HumM2
## t = -5.8662, df = 200.04, p-value = 1.822e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3290068 -0.1634653
## sample estimates:
## mean of x mean of y 
##  3.643604  3.889840
#significant means you fail the null
#the null means they are equal to each other
# p value less than .05 means you reject the null
#these all reject the null are they are worng 

D represents the value of the test-statistic (difference), so the KS-statistic and the p-value represents the likelihood of observing this particular value of D, or a “more extreme” value by pure chance.

LINEAR MODEL - SIMPLE Linear models describe a continuous response variable as a function of one or more predictor variables. They can help you understand and predict the behavior of complex systems or analyze experimental, financial, and biological data.

We want to see if other biological inputs can predict the presence of A. butzleri.

#http://r-statistics.co/Linear-Regression.html
linearMod_HF183 <- lm(A_butzleri_HSP60 ~ HF183, data = clean_data)
print(linearMod_HF183)
## 
## Call:
## lm(formula = A_butzleri_HSP60 ~ HF183, data = clean_data)
## 
## Coefficients:
## (Intercept)        HF183  
##     3.25313      0.07656
modelSummary <- summary(linearMod_HF183)

linearMod_HumM2 <- lm(A_butzleri_HSP60 ~ HumM2, data = clean_data)
linearMod_LeeSg <- lm(A_butzleri_HSP60 ~ LeeSg, data = clean_data)
linearMod_CG01 <- lm(A_butzleri_HSP60 ~ CG01, data = clean_data)

lm_log_HF183 = broom::tidy(linearMod_HF183)
lm_log_HuMm2 = broom::tidy(linearMod_HumM2)
lm_log_LeeSg = broom::tidy(linearMod_LeeSg)
lm_log_CG01 = broom::tidy(linearMod_CG01)

lm_variables = bind_rows(lm_log_HF183, lm_log_HuMm2, lm_log_LeeSg, lm_log_CG01)
lm_variables = lm_variables %>% filter(term != "(Intercept)")

saveRDS(lm_variables, file = "../../results/resulttable_lm.rds")

lm_variables2 = bind_rows(lm_log_HF183, lm_log_HuMm2, lm_log_LeeSg, lm_log_CG01)

lm_variables2 <- lm_variables2
png("../../results/resulttable_lm_3.png", height=400, width=400)
p<-tableGrob(lm_variables2)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
lm_variables <- lm_variables 
png("../../results/resulttable_lm_2.png", height=400, width=400)
p<-tableGrob(lm_variables)
grid.arrange(p)
dev.off()
## quartz_off_screen 
##                 2
#p value is showing if that level within the facotr that you testing is a signifncat predictor our ooutcome 
#if it less than .05 is has a signficiant affect on our outcome
#HF183, HumM2 and LeeSg are all signficant predictors